In [1]:
# Import files
import os
import sys
import numpy as np
import matplotlib as plt
import tensorflow as tf
import time
import random
import math
import pandas as pd
import sklearn
from scipy import misc
import glob
import pickle
%matplotlib inline
plt.pyplot.style.use('ggplot')
In [ ]:
# RELU GLOROT INITIALIZATION
def fully_connected_layer_relu(inputs, input_dim, output_dim, nonlinearity=tf.nn.relu):
weights = tf.Variable(
tf.truncated_normal(
[input_dim, output_dim], stddev=2. / (input_dim + output_dim)**0.5),
'weights')
biases = tf.Variable(tf.zeros([output_dim]), 'biases')
outputs = nonlinearity(tf.matmul(inputs, weights) + biases)
return outputs
# Exponential Linear Units (ELUs) for activation to test the performace difference
def fully_connected_layer_elu(inputs, input_dim, output_dim, nonlinearity=tf.nn.elu):
weights = tf.Variable(
tf.truncated_normal(
[input_dim, output_dim], stddev=2. / (input_dim + output_dim)**0.5),
'weights')
biases = tf.Variable(tf.zeros([output_dim]), 'biases')
outputs = nonlinearity(tf.matmul(inputs, weights) + biases)
return outputs
In [3]:
# Dropout Layer
# https://www.tensorflow.org/api_docs/python/tf/nn/dropout
# The default dropout rate is kept at '0.5'
def dropout(input, drop_rate = 0.5):
# Uses tensorflow's dropout method
return tf.nn.droput(input, drop_rate)
In [4]:
# ksize = Kernel Size
# strides = Strides for Pooling Operation
# padding = Padding Style for Convolution
# NOTE: You can modify these according to your project requirements
# Max Pooling Layer
# https://www.tensorflow.org/api_docs/python/tf/nn/max_pool
def max_pool(inputs, ksize=[1, 3, 3, 1],strides=[1, 2, 2, 1],padding='VALID'):
return tf.nn.max_pool(inputs, ksize, strides, padding, name='max_pool')
# Average Pooling Layer
# https://www.tensorflow.org/api_docs/python/tf/nn/avg_pool
def avg_pool(inputs, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1],padding='VALID'):
return tf.nn.avg_pool(conv1, ksize, strides,padding, name='avg_pool')
In [ ]:
# Check out : https://www.tensorflow.org/versions/r0.12/api_docs/python/nn/normalization
# For the documentation of the input arguments for the normalization
# The depth radius in some implementations is 2 for other AlexNet architectures
def local_response_normalization(inputs,depth_radius=4, bias=1.0, alpha=0.001/9.0, beta=0.75):
return tf.nn.lrn(inputs,depth_radius, bias, alpha, beta, name='Local Response Normalization')
In [ ]:
# https://www.tensorflow.org/api_docs/python/tf/nn/conv2d
def conv(inputs,weights, strides = [1,1,1,1], padding = 'SAME'):
return tf.nn.conv2d(inputs, weights, strides, padding)
In [ ]:
# NOTE: The initial weights and biases are provided with a shape and size
# Hence, they can be modified according to the requirements of your project, this is truncated normal distribution
# For details on truncated normal : https://www.tensorflow.org/api_docs/python/tf/truncated_normal
# Initializing Weights with truncated normal distribution
def initial_weights_for_layers(shape, stdev=5e-2):
return tf.Variable(tf.truncated_normal('initial_weights',shape, stddev=stdev))
# Initializing Biases
def initial_biases(size, value):
return tf.Variable(tf.constant(value, shape=[size]), 'initial_biases')
# RELU ACTIVATION
def relu_activation(input):
input = tf.nn.relu(input, name = 'RELU')
return input
In [5]:
# We assume the images to be stored in a numpy's array 'inputs'
# NOTE: Here we have not loaded any images currently as inputs
# For implementation of this architecture for testing on different datasets such as MNIST, SVHN, etc.
# Check out my github's 'Machine Learning' repository for the updates and implementation
# The paper takes the input size to be = 224 x 224 x 3
In [ ]:
# We name the input images as 'inputs' and outputs as 'outputs'
# Although for this notebook, they are just like empty placeholders as this is the description of the structure
# For adding the biases, we use: https://www.tensorflow.org/api_docs/python/tf/nn/bias_add
# First Convolutional Layer
with tf.name_scope('Convolution - 1'):
# Initializing weights and biases for this layer
weights1 = initial_weights_for_layers(shape=[11, 11, 3, 96])
biases1 = initial_biases(size = 96, value = 0.0)
# Convolution-1
# First layer filters the input with '96' kernels of size '11 x 11 x 3'
# conv_o is just like a dummy to hold the initial convolution
conv_o= conv(inputs, weights1, [1, 4, 4, 1], padding='SAME')
# Adding the biases
conv_o = tf.nn.bias_add(conv_o, biases1)
# ReLU Activation
conv1 = relu_activation(conv_o)
# Normalized, Takes the conv1 as input
norm1 = local_response_normalization(conv1)
# Pooling, Takes norm1 as input
pool1 = max_pool(norm1)
# The initial biases here have been changed to '0.1' rather than '0.0' to test some difference,
# Although in other implementations you will find this initial value to '0.0' which can be changed accordingly
# Second Convolutional Layer
with tf.name_scope('Convolution - 2'):
# Initializing weights and biases for this layer
weights2 = initial_weights_for_layers(shape=[5, 5, 48, 256])
biases1 = initial_biases(size = 256, value = 0.1)
# Convolution-2
# Second layer filters the input with '256' kernels of size '5 x 5 x 48'
# conv_o is just like a dummy to hold the initial convolution
conv_o = conv(pool1, weights2, [1, 1, 1, 1], padding='SAME')
# Adding the biases
conv_o = tf.nn.bias_add(conv_o, biases2)
# ReLU Activation
conv2 = relu_activation(conv_o)
# Normalized, Takes the conv2 as input
norm2 = local_response_normalization(conv2)
# Pooling, Takes norm2 as input
pool2 = max_pool(norm2)
###
# NOTE: There is no normalization and pooling in 3rd, 4th and 5th convolution layer
###
# Third Convolutional Layer
with tf.name_scope('Convolution - 3'):
# Initializing weights and biases for this layer
weights3 = initial_weights_for_layers(shape=[3, 3, 256, 384])
biases3 = initial_biases(size = 384, value = 0.0)
# Convolution-3
# Third layer filters the input with '384' kernels of size '3 x 3 x 256'
# conv_o is just like a dummy to hold the initial convolution
conv_o = conv(pool2, weights3, [1, 1, 1, 1], padding='SAME')
# Adding the biases
conv_o = tf.nn.bias_add(conv_o, biases3)
# ReLU Activation
conv3 = relu_activation(conv_o)
# Fourth Convolutional Layer
with tf.name_scope('Convolution - 4'):
# Initializing weights and biases for this layer
weights4 = initial_weights_for_layers(shape=[3, 3, 192, 384])
biases4 = initial_biases(size = 384, value = 0.0)
# Convolution-4
# Fourth layer filters the input with '384' kernels of size '3 x 3 x 192'
# conv_o is just like a dummy to hold the initial convolution
conv_o = conv(conv3, weights4, [1, 1, 1, 1], padding='SAME')
# Adding the biases
conv_o = tf.nn.bias_add(conv_o, biases4)
# ReLU Activation
conv4 = relu_activation(conv_o)
# Fifth Convolutional Layer
with tf.name_scope('Convolution - 5'):
# Initializing weights and biases for this layer
weights5 = initial_weights_for_layers(shape=[3, 3, 192, 256])
biases5 = initial_biases(size = 256, value = 0.0)
# Convolution-5
# Fifth layer filters the input with '256' kernels of size '3 x 3 x 192'
# conv_o is just like a dummy to hold the initial convolution
conv_o = conv(conv4, weights5, [1, 1, 1, 1], padding='SAME')
# Adding the biases
conv_o = tf.nn.bias_add(conv_o, biases5)
# ReLU Activation
conv5 = relu_activation(conv_o)
# Reshaping the inputs to be passed to the fully connected layers, as the when passing the output of previous layers
# We want to separate out the dimensions as (batch_size, dimensions), where the batch_size can be dynamically calculated
# REFER TO : https://www.tensorflow.org/tutorials/layers
# To understand the formation of a neural network and hence the requirement of this operation
# NOTE: HERE INSTEAD OF THE ORIGINAL DIMENSIONS OF THE ALEXNET
# I am including for ease the variables which you can change to suit the requirements of your project
# This would help you to understand how the dimensions vary according to different datasets
a = 8 # this is just an example
b = 8 # this is just an example
c = 64 # this is just an example
# This can be understood as the dimensions of the input image
reshaped_inputs = tf.reshape(conv5, [-1,a*b*c])
# NOTE: The a,b,c parameters may vary from original implementation
###
# NOTE: Fully Connected layers have '4096' units each
# NOTE: Dropout is used in the first 2 fully connected layers
###
# First Fully Connected Layer
with tf.name_scope('Fully Connected - 1'):
fc=fully_connected_layer_relu(reshaped_input, a*b*c, 4096)
fc1 = dropout(fc)
# Second Fully Connected Layer
with tf.name_scope('Fully Connected - 2'):
fc=fully_connected_layer_relu(fc1, 4096, 4096)
fc2 = dropout(fc)
# Third Fully Connected Layer
with tf.name_scope('Fully Connected - 3'):
fc3=fully_connected_layer_relu(fc2, 4096, 4096)
# Output of third fully connected layer is passed to a 1000-way softmax classifier
# https://www.tensorflow.org/api_docs/python/tf/nn/softmax
with tf.name_scope('Softmax'):
fc4=tf.nn.softmax(fc3,1000,name='softmax')
with tf.name_scope('Outputs'):
outputs =fc4
In [ ]: